from __future__ import unicode_literals
import requests
from bs4 import BeautifulSoup
import time
import json

base_url='http://hot-gif.com/best/'
tag=''

img_url_list=[]


url='http://hot-gif.com/best/'
file_name='hot_gif.json'

count=0

mark=True
while(mark):

    try:
        
        r=requests.get(url,timeout=5)
        content=r.content
        soup=BeautifulSoup(content,'lxml')

        #获取下一页url
        a_next=soup.find('a',attrs={'id':'next'})
        if(a_next==None):
            mark=False
        else:
            url=a_next['href']

        #获取图片地址
        img_set=soup.find_all('img',attrs={'class':'photo gif'})
        #获取mp4格式地址
        video_set=soup.find_all('source',attrs={'type':'video/mp4'})

        
        for img in img_set:
            info=[]
            info.append(img['src'])
            info.append('image')
            img_url_list.append(info)
            count=count+1

        for video in video_set:
            info=[]
            info.append(video['src'])
            info.append('video')
            img_url_list.append(info)
            count=count+1

        if(count>200):
            print('strat write to disk...'+str(count))
            with open(file_name,mode='w+',encoding='utf-8') as file:
                file.write(json.dumps(img_url_list,ensure_ascii=False))
            count=0

    except Exception as e:
        print(e)
        print(url)
    finally:
        time.sleep(2)
        print(str(count),end='\r')

print('退出url'+url)
print('strat write to disk...'+str(count))
with open(file_name,mode='w+') as file:
    file.write(json.dumps(img_url_list,ensure_ascii=False))
